{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Convert a pandas dataframe to geojson for web-mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pandas as pd, requests, json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First download data from the city of Berkeley's API. You can use Socrata's $limit parameter to specify how many rows to grab (otherwise the default is 1,000 rows of data): https://dev.socrata.com/docs/paging.html\n",
    "\n",
    "Example request: https://data.cityofberkeley.info/resource/k489-uv4i.json?$limit=5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# API endpoint for city of Berkeley's 311 calls\n",
    "endpoint_url = 'https://data.cityofberkeley.info/resource/k489-uv4i.json?$limit=2000'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# fetch the URL and load the data\n",
    "response = requests.get(endpoint_url)\n",
    "data = response.json()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Next, turn the json data into a dataframe and clean it up a bit: drop unnecessary columns and any rows that lack lat-long data. We want to make our json file as small as possible (prefer under 5 mb) so that it can be loaded over the Internet to anyone viewing your map, without taking forever to download a huge file."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "We have 2000 rows\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\"['apn', 'city', 'indbdate', 'issue_description', 'issue_type', 'latitude', 'location', 'longitude', 'neighborhood_district', 'object_type', 'secondary_issue_type', 'state', 'street_address', 'ticket_closed_date_time', 'ticket_created_date_time', 'ticket_id', 'ticket_status']\""
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# turn the json data into a dataframe and see how many rows and what columns we have\n",
    "df = pd.DataFrame(data)\n",
    "\n",
    "print('We have {} rows'.format(len(df)))\n",
    "str(df.columns.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>apn</th>\n",
       "      <th>city</th>\n",
       "      <th>indbdate</th>\n",
       "      <th>issue_description</th>\n",
       "      <th>issue_type</th>\n",
       "      <th>latitude</th>\n",
       "      <th>location</th>\n",
       "      <th>longitude</th>\n",
       "      <th>neighborhood_district</th>\n",
       "      <th>object_type</th>\n",
       "      <th>secondary_issue_type</th>\n",
       "      <th>state</th>\n",
       "      <th>street_address</th>\n",
       "      <th>ticket_closed_date_time</th>\n",
       "      <th>ticket_created_date_time</th>\n",
       "      <th>ticket_id</th>\n",
       "      <th>ticket_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Berkeley</td>\n",
       "      <td>2016-10-13T04:17:52</td>\n",
       "      <td>Parking Meter Repair</td>\n",
       "      <td>Traffic and Transportation</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Berkeley</td>\n",
       "      <td>Individual</td>\n",
       "      <td>Parking</td>\n",
       "      <td>CA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2011-08-23T11:27:06</td>\n",
       "      <td>2011-03-01T12:48:53</td>\n",
       "      <td>121000042212</td>\n",
       "      <td>Closed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>054 170801100</td>\n",
       "      <td>Berkeley</td>\n",
       "      <td>2016-10-13T04:13:37</td>\n",
       "      <td>Residential Service Start</td>\n",
       "      <td>Refuse and Recycling</td>\n",
       "      <td>37.86165226</td>\n",
       "      <td>{'longitude': '-122.25267724', 'latitude': '37...</td>\n",
       "      <td>-122.25267724</td>\n",
       "      <td>Berkeley</td>\n",
       "      <td>Property</td>\n",
       "      <td>Residential</td>\n",
       "      <td>CA</td>\n",
       "      <td>2717 FOREST AVE</td>\n",
       "      <td>2013-05-09T09:35:31</td>\n",
       "      <td>2013-05-07T13:00:10</td>\n",
       "      <td>121000141381</td>\n",
       "      <td>Closed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>053 160400800</td>\n",
       "      <td>Berkeley</td>\n",
       "      <td>2016-10-13T04:18:09</td>\n",
       "      <td>Residential Lost or Stolen Cart</td>\n",
       "      <td>Refuse and Recycling</td>\n",
       "      <td>37.85527017</td>\n",
       "      <td>{'longitude': '-122.27364915', 'latitude': '37...</td>\n",
       "      <td>-122.27364915</td>\n",
       "      <td>Berkeley</td>\n",
       "      <td>Property</td>\n",
       "      <td>Residential</td>\n",
       "      <td>CA</td>\n",
       "      <td>2910 ELLIS ST</td>\n",
       "      <td>2014-10-01T17:32:04</td>\n",
       "      <td>2014-09-25T09:33:44</td>\n",
       "      <td>121000214450</td>\n",
       "      <td>Closed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>052 143700100</td>\n",
       "      <td>Berkeley</td>\n",
       "      <td>2016-10-13T04:13:29</td>\n",
       "      <td>Commercial Cart Size Decrease</td>\n",
       "      <td>Refuse and Recycling</td>\n",
       "      <td>37.84693953</td>\n",
       "      <td>{'longitude': '-122.27597171', 'latitude': '37...</td>\n",
       "      <td>-122.27597171</td>\n",
       "      <td>Berkeley</td>\n",
       "      <td>Property</td>\n",
       "      <td>Commercial</td>\n",
       "      <td>CA</td>\n",
       "      <td>1538 SIXTY-THIRD ST</td>\n",
       "      <td>2015-09-18T11:51:12</td>\n",
       "      <td>2015-07-06T09:14:01</td>\n",
       "      <td>121000252506</td>\n",
       "      <td>Closed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>057 202901300</td>\n",
       "      <td>Berkeley</td>\n",
       "      <td>2016-10-13T04:13:26</td>\n",
       "      <td>Illegal Dumping - City Property</td>\n",
       "      <td>Streets, Utilities, and Transportation</td>\n",
       "      <td>37.86833388</td>\n",
       "      <td>{'longitude': '-122.26742331', 'latitude': '37...</td>\n",
       "      <td>-122.26742331</td>\n",
       "      <td>Berkeley</td>\n",
       "      <td>Property</td>\n",
       "      <td>Clean City Program</td>\n",
       "      <td>CA</td>\n",
       "      <td>2255 SHATTUCK AVE</td>\n",
       "      <td>2015-06-25T08:33:30</td>\n",
       "      <td>2015-06-24T09:14:52</td>\n",
       "      <td>121000251283</td>\n",
       "      <td>Closed</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             apn      city             indbdate  \\\n",
       "0            NaN  Berkeley  2016-10-13T04:17:52   \n",
       "1  054 170801100  Berkeley  2016-10-13T04:13:37   \n",
       "2  053 160400800  Berkeley  2016-10-13T04:18:09   \n",
       "3  052 143700100  Berkeley  2016-10-13T04:13:29   \n",
       "4  057 202901300  Berkeley  2016-10-13T04:13:26   \n",
       "\n",
       "                 issue_description                              issue_type  \\\n",
       "0             Parking Meter Repair              Traffic and Transportation   \n",
       "1        Residential Service Start                    Refuse and Recycling   \n",
       "2  Residential Lost or Stolen Cart                    Refuse and Recycling   \n",
       "3    Commercial Cart Size Decrease                    Refuse and Recycling   \n",
       "4  Illegal Dumping - City Property  Streets, Utilities, and Transportation   \n",
       "\n",
       "      latitude                                           location  \\\n",
       "0          NaN                                                NaN   \n",
       "1  37.86165226  {'longitude': '-122.25267724', 'latitude': '37...   \n",
       "2  37.85527017  {'longitude': '-122.27364915', 'latitude': '37...   \n",
       "3  37.84693953  {'longitude': '-122.27597171', 'latitude': '37...   \n",
       "4  37.86833388  {'longitude': '-122.26742331', 'latitude': '37...   \n",
       "\n",
       "       longitude neighborhood_district object_type secondary_issue_type state  \\\n",
       "0            NaN              Berkeley  Individual              Parking    CA   \n",
       "1  -122.25267724              Berkeley    Property          Residential    CA   \n",
       "2  -122.27364915              Berkeley    Property          Residential    CA   \n",
       "3  -122.27597171              Berkeley    Property           Commercial    CA   \n",
       "4  -122.26742331              Berkeley    Property   Clean City Program    CA   \n",
       "\n",
       "        street_address ticket_closed_date_time ticket_created_date_time  \\\n",
       "0                  NaN     2011-08-23T11:27:06      2011-03-01T12:48:53   \n",
       "1      2717 FOREST AVE     2013-05-09T09:35:31      2013-05-07T13:00:10   \n",
       "2        2910 ELLIS ST     2014-10-01T17:32:04      2014-09-25T09:33:44   \n",
       "3  1538 SIXTY-THIRD ST     2015-09-18T11:51:12      2015-07-06T09:14:01   \n",
       "4    2255 SHATTUCK AVE     2015-06-25T08:33:30      2015-06-24T09:14:52   \n",
       "\n",
       "      ticket_id ticket_status  \n",
       "0  121000042212        Closed  \n",
       "1  121000141381        Closed  \n",
       "2  121000214450        Closed  \n",
       "3  121000252506        Closed  \n",
       "4  121000251283        Closed  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# convert lat-long to floats and change address from ALL CAPS to Regular Capitalization\n",
    "df['latitude'] = df['latitude'].astype(float)\n",
    "df['longitude'] = df['longitude'].astype(float)\n",
    "df['street_address'] = df['street_address'].str.title()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# we don't need all those columns - only keep useful ones\n",
    "useful_cols = ['issue_description', 'issue_type', 'latitude', 'longitude', 'street_address', 'ticket_status']\n",
    "df_subset = df[useful_cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "We have 985 geotagged rows\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>issue_description</th>\n",
       "      <th>issue_type</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>street_address</th>\n",
       "      <th>ticket_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1989</th>\n",
       "      <td>Residential Missed Pickup</td>\n",
       "      <td>Refuse and Recycling</td>\n",
       "      <td>37.884110</td>\n",
       "      <td>-122.248453</td>\n",
       "      <td>1438 Grizzly Peak Blvd</td>\n",
       "      <td>Closed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1991</th>\n",
       "      <td>Residential Bulky Pickup</td>\n",
       "      <td>Refuse and Recycling</td>\n",
       "      <td>37.857363</td>\n",
       "      <td>-122.274100</td>\n",
       "      <td>1726 Stuart St</td>\n",
       "      <td>Closed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1992</th>\n",
       "      <td>Commercial Site Inspection</td>\n",
       "      <td>Refuse and Recycling</td>\n",
       "      <td>37.854159</td>\n",
       "      <td>-122.259605</td>\n",
       "      <td>3075 Telegraph Ave Com</td>\n",
       "      <td>Closed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1993</th>\n",
       "      <td>Miscellaneous Internet Request</td>\n",
       "      <td>General Questions/information</td>\n",
       "      <td>37.869606</td>\n",
       "      <td>-122.273367</td>\n",
       "      <td>2100 M L King Jr Way</td>\n",
       "      <td>Closed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1995</th>\n",
       "      <td>Commercial Bin Size Increase</td>\n",
       "      <td>Refuse and Recycling</td>\n",
       "      <td>37.851065</td>\n",
       "      <td>-122.291845</td>\n",
       "      <td>2950 Seventh St</td>\n",
       "      <td>Closed</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   issue_description                     issue_type  \\\n",
       "1989       Residential Missed Pickup           Refuse and Recycling   \n",
       "1991        Residential Bulky Pickup           Refuse and Recycling   \n",
       "1992      Commercial Site Inspection           Refuse and Recycling   \n",
       "1993  Miscellaneous Internet Request  General Questions/information   \n",
       "1995    Commercial Bin Size Increase           Refuse and Recycling   \n",
       "\n",
       "       latitude   longitude          street_address ticket_status  \n",
       "1989  37.884110 -122.248453  1438 Grizzly Peak Blvd        Closed  \n",
       "1991  37.857363 -122.274100          1726 Stuart St        Closed  \n",
       "1992  37.854159 -122.259605  3075 Telegraph Ave Com        Closed  \n",
       "1993  37.869606 -122.273367    2100 M L King Jr Way        Closed  \n",
       "1995  37.851065 -122.291845         2950 Seventh St        Closed  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# drop any rows that lack lat/long data\n",
    "df_geo = df_subset.dropna(subset=['latitude', 'longitude'], axis=0, inplace=False)\n",
    "\n",
    "print('We have {} geotagged rows'.format(len(df_geo)))\n",
    "df_geo.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Refuse and Recycling                            709\n",
       "General Questions/information                   103\n",
       "Streets, Utilities, and Transportation           96\n",
       "Parks, Trees and Vegetation                      39\n",
       "Environmental Services and Programs              14\n",
       "Business License                                  7\n",
       "Traffic and Transportation                        7\n",
       "Facilities, Electrical & Property Management      5\n",
       "Graffiti and Vandalism                            3\n",
       "Other Account Services and Billing                1\n",
       "Equipment Maintenance                             1\n",
       "Name: issue_type, dtype: int64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# what is the distribution of issue types?\n",
    "df_geo['issue_type'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally, convert each row in the dataframe to a geojson-formatted feature and save the result as a file. The format is pretty simple and you can see it here: http://geojson.org/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def df_to_geojson(df, properties, lat='latitude', lon='longitude'):\n",
    "    \"\"\"\n",
    "    Turn a dataframe containing point data into a geojson formatted python dictionary\n",
    "    \n",
    "    df : the dataframe to convert to geojson\n",
    "    properties : a list of columns in the dataframe to turn into geojson feature properties\n",
    "    lat : the name of the column in the dataframe that contains latitude data\n",
    "    lon : the name of the column in the dataframe that contains longitude data\n",
    "    \"\"\"\n",
    "    \n",
    "    # create a new python dict to contain our geojson data, using geojson format\n",
    "    geojson = {'type':'FeatureCollection', 'features':[]}\n",
    "\n",
    "    # loop through each row in the dataframe and convert each row to geojson format\n",
    "    for _, row in df.iterrows():\n",
    "        # create a feature template to fill in\n",
    "        feature = {'type':'Feature',\n",
    "                   'properties':{},\n",
    "                   'geometry':{'type':'Point',\n",
    "                               'coordinates':[]}}\n",
    "\n",
    "        # fill in the coordinates\n",
    "        feature['geometry']['coordinates'] = [row[lon],row[lat]]\n",
    "\n",
    "        # for each column, get the value and add it as a new feature property\n",
    "        for prop in properties:\n",
    "            feature['properties'][prop] = row[prop]\n",
    "        \n",
    "        # add this feature (aka, converted dataframe row) to the list of features inside our dict\n",
    "        geojson['features'].append(feature)\n",
    "    \n",
    "    return geojson"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "useful_columns = ['street_address', 'issue_description', 'issue_type', 'ticket_status']\n",
    "geojson_dict = df_to_geojson(df_geo, properties=useful_columns)\n",
    "geojson_str = json.dumps(geojson_dict, indent=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "985 geotagged features saved to file\n"
     ]
    }
   ],
   "source": [
    "# save the geojson result to a file\n",
    "output_filename = 'dataset.js'\n",
    "with open(output_filename, 'w') as output_file:\n",
    "    output_file.write('var dataset = {};'.format(geojson_str))\n",
    "    \n",
    "# how many features did we save to the geojson file?\n",
    "print('{} geotagged features saved to file'.format(len(geojson_dict['features'])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
